################################################################################
##
## Purpose: This script calculates the tone of each utterance using the peRspective API.
##          It is not run as part of the replication, since users must provide their
##          own API key.
##
## Author: James Bisbee (james.h.bisbee@vanderbilt.edu)
##
##  - Inputs:
##    - ./data/prepped/finalData_for_NLP.RData: Prepped data from 6_DATA_intermediate_build.R
##  - Outputs:
##    - ./data/prepped/toxicity_resultsFull.RData
##
################################################################################

rm(list = ls())
gc()
require(tidyverse)
require(peRspective)
set.seed(123)

# Compute details
print(paste0('Compute environment from ',Sys.Date(),' run by Bisbee'))
if(Sys.info()['sysname'] == 'Windows') {
  ram_size = system("wmic MemoryChip get Capacity", intern = TRUE)[-1]
  model_name = system("wmic cpu get name", intern = TRUE)[2] # nocov
  vendor_id = system("wmic cpu get manufacturer", intern = TRUE)[2] # nocov
  
  print(list(ram = stringr::str_squish(ram_size)[1],
             vendor_id = stringr::str_squish(vendor_id),
             model_name = stringr::str_squish(model_name),
             no_of_cores = parallel::detectCores()))
} else if(Sys.info()['sysname'] == 'Linuxs') {
  splitted <- strsplit(system("ps -C rsession -o %cpu,%mem,pid,cmd", intern = TRUE), " ")
  df <- do.call(rbind, lapply(splitted[-1], 
                              function(x) data.frame(
                                cpu = as.numeric(x[2]),
                                mem = as.numeric(x[4]),
                                pid = as.numeric(x[5]),
                                cmd = paste(x[-c(1:5)], collapse = " "))))
  df
} else {
  cat("If not on Linux or Windows, you'll have to figure out your own solution to seeing the compute environment.")
}

sessionInfo()


load('./data/prepped/finalData_for_NLP.RData')


# Full toxicity
toxic_resFull <- finalMerge %>%
  ungroup() %>%
  prsp_stream(text = textclean,text_id = fullInd,safe_output = TRUE,
              key = "", # NEED TO BE REDACTED BEFORE PUBLICATION
              score_model = c("TOXICITY","SEVERE_TOXICITY",
                              "IDENTITY_ATTACK","INSULT","PROFANITY",
                              "SEXUALLY_EXPLICIT","THREAT",
                              "FLIRTATION","ATTACK_ON_AUTHOR",
                              "ATTACK_ON_COMMENTER","INCOHERENT",
                              "INFLAMMATORY","LIKELY_TO_REJECT",
                              "OBSCENE","UNSUBSTANTIAL"),
              verbose = F)

save(toxic_resFull,file = './data/prepped/toxicity_resultsFull.RData')
